File set-up
Set working directory to current directory
if (rstudioapi::isAvailable()) {
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))
}
Load standard libraries and resolve conflicts
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(conflicted)
conflict_prefer("filter", "dplyr")
## [conflicted] Will prefer dplyr::filter over any other package
conflict_prefer("select", "dplyr")
## [conflicted] Will prefer dplyr::select over any other package
conflict_prefer("slice", "dplyr")
## [conflicted] Will prefer dplyr::slice over any other package
conflict_prefer("rename", "dplyr")
## [conflicted] Will prefer dplyr::rename over any other package
conflict_prefer('intersect', 'dplyr')
## [conflicted] Will prefer dplyr::intersect over any other package
Read data and order
all_circ = read_tsv('../data/Supplementary_Table_2_all_circRNAs.txt')
## Rows: 1137099 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (16): chr, strand, cell_line, tool, circ_id, circ_id_strand, count_group...
## dbl (7): start, end, BSJ_count, n_detected, n_db, estim_len_in, BSJ_count_m...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
all_circ$tool = factor(all_circ$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
all_circ
Figure 2C & Sup Figure 4
nr_detected = all_circ %>%
group_by(chr, start, end, circ_id, cell_line) %>%
summarise(n_detected = n()) %>%
ungroup()
## `summarise()` has grouped output by 'chr', 'start', 'end', 'circ_id'. You can
## override using the `.groups` argument.
n_detected_per_tool = all_circ %>%
left_join(nr_detected) %>%
mutate(n_detected_group = NA,
n_detected_group = ifelse(n_detected == 1, 'unique', n_detected_group),
n_detected_group = ifelse(n_detected > 1, '2 tools', n_detected_group),
n_detected_group = ifelse(n_detected > 2, '2-5 tools', n_detected_group),
n_detected_group = ifelse(n_detected > 5, '6-9 tools', n_detected_group),
n_detected_group = ifelse(n_detected > 9, '≥ 10 tools', n_detected_group),
n_detected_group = ifelse(n_detected == 16, 'all tools', n_detected_group))
## Joining, by = c("chr", "start", "end", "cell_line", "circ_id", "n_detected")
n_detected_per_tool$n_detected_group = factor(n_detected_per_tool$n_detected_group,
levels = c('all tools', '≥ 10 tools', '6-9 tools', "2-5 tools", '2 tools', 'unique'))
n_detected_per_tool$tool = factor(n_detected_per_tool$tool, levels = c("circseq_cup", "CIRI2", "CIRIquant", "CircSplice", "find_circ", "CirComPara2", "CIRCexplorer3", "circtools", "Sailfish-cir", "NCLscan", "NCLcomparator", "PFv2", "ecircscreen", "KNIFE", "circRNA_finder", "segemehl"))
n_detected_per_tool %>%
#filter(cell_line == "HLF") %>%
ggplot(aes(tool, fill = n_detected_group)) +
geom_bar() +
mytheme_discrete_x +
ylab("number of circRNAs") +
scale_fill_manual(name = "circRNAs detected by",
values = c('#D55E00',"#0072B2", '#00B9F2', '#00A875', "#E69F00", "#CC79A7")) +
theme(axis.title.x=element_blank()) +
#facet_wrap(~cell_line) +
scale_y_continuous(labels = scales::comma_format())

#ggsave('separate_figures/figure_2C.pdf', width = 10, height = 9, units = "cm")
#ggsave('../supplemental/sup_figures/sup_figure_4.pdf', width = 21, height = 12, units = "cm")